#!/usr/bin/env python
# -*- coding:utf-8 -*-

from re import findall,I
from lib.utils.check import *
from lib.utils.printer import *
from lib.utils.readfile import *
from lib.request.request import *

class robots(Request):
    """
    检测 robotx.txt
    """
    get = "GET"
    def __init__(self,kwargs,url,data):
        Request.__init__(self,kwargs)
        self.url = url
        self.data = data
        self.result = {
            'robots':None
        }

    def check(self):
        info("检测robots.txt...")
        paths = []
        isNothing = True
        splitUrl = SplitUrl(self.url)
        netlocUrl = splitUrl.netloc
        url = Cpath(netlocUrl,'robots.txt')
        more("检测载荷:{}".format(url))
        req = self.Send(url=url,method=self.get)
        if req.code == 200 and CEndUrl(req.url) ==url:
            if req.content != "":
                paths += findall(r'[disallow]\: (\S*)',req.content)
        if paths != None and paths != []:
            plus('robots.txt中允许访问的地址: %s'%(req.url))
            for path in paths:
                url = Cpath(netlocUrl,path)
                req = self.Send(url=url,method=self.get)
                more('[%s] %s'%(req.code,req.url))
                if self.result['robots']:
                    self.result['robots'] = path
                else:
                    self.result['robots'] += ' ,' + path
            isNothing = False
        if isNothing:
            info_nothing()
        return self.result
def run(kwargs,url,data):
    result = {}
    scan = robots(kwargs,url,data)
    result = scan.check()
    return result
